source("Downstream.v2.R")

# loading libraries
library(Seurat)
library(rrrSingleCellUtils)
library(ggplot2)
library(msigdbr)
library(dplyr)

Load Seurat objects

Load Seurat objects here so only have to complete once (commented out elsewhere).

Load Osteoblasts

# Create Seurat objects and perform initial QC.  Label original source.  osteoblasts
osteoblasts <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0031/outs/filtered_feature_bc_matrix/")


osteoblasts <- subset(osteoblasts, subset = nCount_RNA < 20000 & percent.mt < 10)

osteoblasts$src <- "osteoblasts"
osteoblasts$model <- "osteoblasts"

Load OS17

# OS17 Culture
os17_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0016/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os17_cx_raw <- subset(os17_cx_raw, subset = nFeature_RNA > 3500 & nCount_RNA < 50000 & percent.mt <
    15)

os17_cx_raw$src <- "Culture"
os17_cx_raw$model <- "OS-17"

## Tibia
os17_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0018xS0028/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os17_tib_raw <- subset(os17_tib_raw, subset = nFeature_RNA > 3000 & nCount_RNA < 60000 & percent.mt <
    18)

os17_tib_raw$src <- "Tibia"
os17_tib_raw$model <- "OS-17"

## Lung double check usage throughout rest of code - Emily (fixed issue where coming from
## wrong file)
os17_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home2/gdrobertslab/lab/Counts/S0024xS0029/outs/filtered_feature_bc_matrix",
    species_pattern = "^hg19-")


os17_lung_raw <- subset(os17_lung_raw, subset = nFeature_RNA > 1250 & nCount_RNA < 60000 & percent.mt <
    25)

os17_lung_raw$src <- "Lung"
os17_lung_raw$model <- "OS-17"

Process OS17 culture for Figure 1

os17_cx <- NormalizeData(os17_cx_raw) %>%
    FindVariableFeatures(selection.method = "vst") %>%
    ScaleData() %>%
    RunPCA(pc.genes = os17_cx@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 117232
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8912
## Number of communities: 8
## Elapsed time: 0 seconds

DimPlot(os17_cx, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + scale_color_npg(alpha = 0.7)


# Regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
os17_cx <- CellCycleScoring(object = os17_cx, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
os17_cx <- ScaleData(object = os17_cx, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(os17_cx),
    block.size = 10000)

os17_cx <- RunPCA(os17_cx, pc.genes = os17_cx@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 119731
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8908
## Number of communities: 5
## Elapsed time: 0 seconds

# Find optimal clustering resolution
os17_cx_nc <- nRes(os17_cx, res = seq(from = 0.1, to = 0.2, by = 0.05))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 119731
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9457
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 119731
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9302
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 119731
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9152
## Number of communities: 5
## Elapsed time: 0 seconds

plot <- pSil(os17_cx_nc, 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 119731
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9302
## Number of communities: 4
## Elapsed time: 0 seconds

## NULL
plot
## NULL

# With res = 0.15, cells in cluster 3 do not have any significantly different genes that are
# deferentially regulated Therefore, we decided to proceed with res = 0.1
os17_cx <- FindClusters(os17_cx, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 119731
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9457
## Number of communities: 3
## Elapsed time: 0 seconds

if (!file.exists("Data")) {
    dir.create("Data")
}
save(os17_cx, file = "Data/os17_cx_CCR.RData")

Add lineage tags to metadata for Figure 5

For lineage analysis in Figure 5.

# Plot histograms of lineage tracing tags in the Seurat objects

if (!file.exists("Data/S0024xS0029.ltbc")) {
    s <- c("S0016", "S0018xS0028", "S0024xS0029")
    for (i in s) {
        gen_cellecta_bc_data(file = paste0("/gpfs0/home/gdrobertslab/lab/Counts/", i, "/outs/possorted_genome_bam.bam"),
            verbose = TRUE, output = paste0("Data/", i, ".ltbc"), samtools_module = "SAMtools")
    }

    for (i in s) {
        ltbc <- read.table(paste0("Data/", i, ".ltbc"), header = T, sep = "\t")
        if (substr(ltbc[[1, 1]], 1, 5) == "CB:Z:") {
            ltbc$cid <- substr(ltbc$cid, 6, 22)
            write.table(ltbc, paste0("Data/", i, ".ltbc"), sep = "\t")
        }
    }
}

os17_cx_raw <- process_ltbc(os17_cx_raw, cid_lt = read.table("Data/S0016.ltbc", header = T, sep = "\t"),
    histogram = F, relative = T)

os17_tib_raw <- process_ltbc(os17_tib_raw, cid_lt = read.table("Data/S0018xS0028.ltbc", sep = "\t"),
    histogram = F, relative = T)

os17_lung_raw <- process_ltbc(os17_lung_raw, cid_lt = read.table("Data/S0024xS0029.ltbc", sep = "\t"),
    histogram = F, relative = T)

Create OS17 object for lineage analysis in Figure 5

# Subset all to 2800 cells in each condition
os17_cx <- subset(os17_cx_raw, cells = sample(Cells(os17_cx_raw), 2800))
os17.tib <- subset(os17_tib_raw, cells = sample(Cells(os17_tib_raw), 2800))
os17.lung <- subset(os17_lung_raw, cells = sample(Cells(os17_lung_raw), 2800))

# Merge into a single Seurat object
os17 <- merge(os17_cx, y = c(os17.tib, os17.lung), add.cell_ids = c("Culture", "Tibia", "Lung"),
    project = "LineageTracing")

# Process and cluster
os17 <- NormalizeData(os17) %>%
    FindVariableFeatures(selection.method = "vst") %>%
    ScaleData() %>%
    RunPCA(pc.genes = os17@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8400
## Number of edges: 289959
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9048
## Number of communities: 8
## Elapsed time: 0 seconds

# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

os17 <- CellCycleScoring(object = os17, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)

os17 <- ScaleData(object = os17, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(os17),
    block.size = 10000)

os17 <- RunPCA(os17, pc.genes = os17@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 8400
## Number of edges: 280874
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8977
## Number of communities: 7
## Elapsed time: 0 seconds

DimPlot(os17, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS17 Clusters") +
    scale_color_npg(alpha = 0.7)


# Plot the data
set.seed(100)
cell_ids <- sample(colnames(os17))

DimPlot(os17, reduction = "umap", group.by = "src", pt.size = 1, label = F, order = cell_ids) +
    coord_fixed() + ggtitle("OS17 by Source") + scale_color_npg()


save(os17, file = "Data/os17.RData")

Load t143b

# t143b Culture
t143b_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0017/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


t143b_cx_raw <- subset(t143b_cx_raw, subset = nFeature_RNA > 4000 & nCount_RNA < 74000 & percent.mt <
    17)

t143b_cx_raw$src <- "Culture"
t143b_cx_raw$model <- "143B"

## Tibia
t143b_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0052/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


t143b_tib_raw <- subset(t143b_tib_raw, subset = nFeature_RNA > 1000 & nCount_RNA < 30000 & percent.mt <
    22)

t143b_tib_raw$src <- "Tibia"
t143b_tib_raw$model <- "143B"

# Lung
t143b_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0019-143B-lung/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


t143b_lung_raw <- subset(t143b_lung_raw, subset = nFeature_RNA > 1000 & nCount_RNA < 30000 & percent.mt <
    22)

t143b_lung_raw$src <- "Lung"
t143b_lung_raw$model <- "143B"

Load NCH-OS2

# NCHOS2 Flank
os2_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0076/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os2_cx_raw <- subset(os2_cx_raw, subset = nCount_RNA < 36000 & percent.mt < 50)

os2_cx_raw$src <- "Flank"
os2_cx_raw$model <- "NCH-OS-2"

## Tibia
os2_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0042/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os2_tib_raw <- subset(os2_tib_raw, subset = nFeature_RNA > 2500 & nCount_RNA < 40000 & percent.mt <
    20)

os2_tib_raw$src <- "Tibia"
os2_tib_raw$model <- "NCH-OS-2"

## Lung
os2_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0041/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os2_lung_raw <- subset(os2_lung_raw, subset = nFeature_RNA > 2500 & nCount_RNA < 60000 & percent.mt <
    30)

os2_lung_raw$src <- "Lung"
os2_lung_raw$model <- "NCH-OS-2"

Load NCH-OS7

# NCHOS7 Flank
os7_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0043/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os7_cx_raw <- subset(os7_cx_raw, subset = nCount_RNA < 50000 & percent.mt < 25)

os7_cx_raw$src <- "Flank"
os7_cx_raw$model <- "NCH-OS-7"

## Tibia
os7_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0034/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os7_tib_raw <- subset(os7_tib_raw, subset = nFeature_RNA > 2000 & nCount_RNA < 35000 & percent.mt <
    14)

os7_tib_raw$src <- "Tibia"
os7_tib_raw$model <- "NCH-OS-7"

## Lung
os7_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0055/outs/filtered_feature_bc_matrix/",
    species_pattern = "^hg19-")


os7_lung_raw <- subset(os7_lung_raw, subset = nCount_RNA < 42000 & percent.mt < 20)

os7_lung_raw$src <- "Lung"
os7_lung_raw$model <- "NCH-OS-7"

Process NCHS-OS7 flank for Figure 1

os7_cx <- NormalizeData(os7_cx_raw) %>%
    FindVariableFeatures(selection.method = "vst") %>%
    ScaleData() %>%
    RunPCA(pc.genes = os7_cx@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 68728
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8807
## Number of communities: 6
## Elapsed time: 0 seconds

DimPlot(os7_cx, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + scale_color_npg(alpha = 0.7)


# Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

os7_cx <- CellCycleScoring(object = os7_cx, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)

os7_cx <- ScaleData(object = os7_cx, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(os7_cx),
    block.size = 10000)

os7_cx <- RunPCA(os7_cx, pc.genes = os7_cx@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8676
## Number of communities: 5
## Elapsed time: 0 seconds

# Find optimal clustering resolution
os7_cx_nc <- nRes(os7_cx, res = seq(from = 0.1, to = 0.15, by = 0.01))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9248
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9195
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9152
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9123
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9095
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9067
## Number of communities: 4
## Elapsed time: 0 seconds

plot <- pSil(os7_cx_nc, 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9067
## Number of communities: 4
## Elapsed time: 0 seconds

## NULL
plot
## NULL

os7_cx <- FindClusters(os7_cx, resolution = 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 69766
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9067
## Number of communities: 4
## Elapsed time: 0 seconds

save(os7_cx, file = "Data/os7_cx_CCR.RData")

Save Raw Objects

# Save raw objects

# Make lists for easy loop saving
sample_list <- c(osteoblasts, os17_cx_raw, os17_tib_raw, os17_lung_raw, t143b_cx_raw, t143b_tib_raw,
    t143b_lung_raw, os2_cx_raw, os2_tib_raw, os2_lung_raw, os7_cx_raw, os7_tib_raw, os7_lung_raw)

sample_names <- c("osteoblasts", "os17_cx_raw", "os17_tib_raw", "os17_lung_raw", "t143b_cx_raw",
    "t143b_tib_raw", "t143b_lung_raw", "os2_cx_raw", "os2_tib_raw", "os2_lung_raw", "os7_cx_raw",
    "os7_tib_raw", "os7_lung_raw")

# Correlate sample names and sample labels
names(sample_list) <- sample_names

# Save each raw object
for (item in sample_names) {
    # Save each sample as an individual Seurat object with proper name
    assign(item, sample_list[[item]])
    save(list = item, file = paste("Data/", item, ".RData", sep = ""))
}

Merge Objects

Merge objects, process, and cell cycle regress.

# Merge into a single Seurat object
OS <- merge(osteoblasts, y = c(os17_cx_raw, os17_tib_raw, os17_lung_raw, t143b_cx_raw, t143b_tib_raw,
    t143b_lung_raw, os2_cx_raw, os2_tib_raw, os2_lung_raw, os7_cx_raw, os7_tib_raw, os7_lung_raw),
    add.cell_ids = c("Osteoblasts", "OS-17_Culture", "OS-17_Tibia", "OS-17_Lung", "143B_Culture",
        "143B_Tibia", "143B_Lung", "NCH-OS-2_Flank", "NCH-OS-2_Tibia", "NCH-OS-2_Lung", "NCH-OS-7_Flank",
        "NCH-OS-7_Tibia", "NCH-OS-7_Lung"), project = "Heterogeneity")

# Process and cluster
OS <- NormalizeData(OS) %>%
    FindVariableFeatures(selection.method = "vst") %>%
    ScaleData() %>%
    RunPCA(pc.genes = os.17@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 54300
## Number of edges: 1853982
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9614
## Number of communities: 10
## Elapsed time: 14 seconds

# rm(osteoblasts, os17_cx_raw, os17_tib_raw, os17_lung_raw, t143b_cx_raw, t143b_tib_raw,
# t143b_lung_raw, os2_cx_raw, os2_tib_raw, os2_lung_raw, os7_cx_raw, os7_tib_raw,
# os7_lung_raw)

# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

OS <- CellCycleScoring(object = OS, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)

OS <- ScaleData(object = OS, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(OS),
    block.size = 10000)

OS <- RunPCA(OS, pc.genes = OS@var.genes, npcs = 20) %>%
    RunUMAP(reduction = "pca", dims = 1:20) %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 54300
## Number of edges: 1824736
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9610
## Number of communities: 9
## Elapsed time: 14 seconds

save(OS, file = "Data/OS_merged_postCCR.RData")

OS$model <- as.factor(OS$model)
OS$model <- factor(as.factor(OS$model), levels = c("OS-17", "143B", "NCH-OS-2", "NCH-OS-7", "Osteoblasts"))

# Plot the data
DimPlot(OS, reduction = "umap", group.by = "model", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS by Model") +
    scale_color_npg(alpha = 1)


DimPlot(OS, reduction = "umap", group.by = "src", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS by Source")


DimPlot(OS, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS by Clusters") +
    scale_color_npg(alpha = 0.7)

List Objects and Process

List objects, process, and cell cycle regress. Used in Figure 3B - ITH code.

# Create List to compute ITH scores (subset to 1500 cells for each)
OS.list_1500 <- list(osteoblasts = osteoblasts, OS17.cx = os17_cx_raw, OS17.Tibia = os17_tib_raw,
    OS17.Lung = os17_lung_raw, t143B.cx = t143b_cx_raw, t143B.Tibia = t143b_tib_raw, t143B.Lung = t143b_lung_raw,
    OS2.Flank = os2_cx_raw, OS2.Tibia = os2_tib_raw, OS2.Lung = os2_lung_raw, OS7.Flank = os7_cx_raw,
    OS7.Tibia = os7_tib_raw, OS7.Lung = os7_lung_raw)

for (i in 1:length(OS.list_1500)) {
    OS.list_1500[[i]] <- NormalizeData(OS.list_1500[[i]]) %>%
        FindVariableFeatures(selection.method = "vst") %>%
        ScaleData() %>%
        RunPCA(pc.genes = OS.list_1500[[i]]@var.genes, npcs = 20) %>%
        RunUMAP(reduction = "pca", dims = 1:20) %>%
        FindNeighbors(reduction = "pca", dims = 1:20) %>%
        FindClusters(resolution = 0.3) %>%
        subset(cells = sample(Cells(OS.list_1500[[i]]), 1500))
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5735
## Number of edges: 201174
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8642
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3178
## Number of edges: 117232
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8912
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2849
## Number of edges: 103732
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8568
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4574
## Number of edges: 156586
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8373
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3363
## Number of edges: 123143
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8215
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6763
## Number of edges: 226939
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8477
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5134
## Number of edges: 181392
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8631
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6749
## Number of edges: 222345
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8705
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4285
## Number of edges: 148107
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8520
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1648
## Number of edges: 57506
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8356
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1998
## Number of edges: 68728
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8807
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 2891
## Number of edges: 102969
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8567
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5133
## Number of edges: 177023
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8680
## Number of communities: 9
## Elapsed time: 0 seconds

# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

for (i in 1:length(OS.list_1500)) {
    OS.list_1500[[i]] <- CellCycleScoring(object = OS.list_1500[[i]], s.features = s.genes, g2m.features = g2m.genes,
        set.ident = TRUE)

    OS.list_1500[[i]] <- ScaleData(object = OS.list_1500[[i]], vars.to.regress = c("S.Score", "G2M.Score"),
        features = VariableFeatures(OS.list_1500[[i]]), block.size = 10000)  # Change to VariableFeatures()

    OS.list_1500[[i]] <- RunPCA(OS.list_1500[[i]], pc.genes = OS.list_1500[[i]]@var.genes, npcs = 20) %>%
        RunUMAP(reduction = "pca", dims = 1:20) %>%
        FindNeighbors(reduction = "pca", dims = 1:20) %>%
        FindClusters(resolution = 0.3)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 61069
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8390
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 61975
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8650
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 56704
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8091
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 56977
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7771
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 58610
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7691
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 60734
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7791
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 56073
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7987
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 52995
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8059
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 52729
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7782
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 51652
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7883
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 51919
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8649
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 55164
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8026
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 1500
## Number of edges: 56741
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8093
## Number of communities: 4
## Elapsed time: 0 seconds

save(OS.list_1500, file = "Data/OSlist_1500_CCR.RData")

List objects, process, and cell cycle regress. Used in Figure 3 C-D code.

# Create list for usage in Figure 3
tibia_list <- list(OS17.Tibia = os17_tib_raw, t143B.Tibia = t143b_tib_raw, OS2.Tibia = os2_tib_raw,
    OS7.Tibia = os7_tib_raw)

lung_list <- list(OS17.Lung = os17_lung_raw, t143B.Lung = t143b_lung_raw, OS2.Lung = os2_lung_raw,
    OS7.Lung = os7_lung_raw)

OS_list <- list()

for (i in 1:length(tibia_list)) {
    message(i, head(paste(tibia_list[[i]]$model, "_TL", sep = ""), 1))
    # Subset number of cells in each Seurat object to the least in the group
    a <- table(tibia_list[[i]]$orig.ident)
    b <- table(lung_list[[i]]$orig.ident)
    list <- c(a, b)
    min <- min(list)

    tibia_list[[i]] <- subset(tibia_list[[i]], cells = sample(Cells(tibia_list[[i]]), min))

    lung_list[[i]] <- subset(lung_list[[i]], cells = sample(Cells(lung_list[[i]]), min))

    title <- head(paste(tibia_list[[i]]$model, "_TL", sep = ""), 1)
    title <- gsub("-", "", title)

    OS_list[[title]] <- merge(tibia_list[[i]], y = c(lung_list[[i]]), add.cell_ids = c(head(paste(tibia_list[[i]]$model,
        "_", tibia_list[[i]]$src, sep = ""), 1), head(paste(lung_list[[i]]$model, "_", lung_list[[i]]$src,
        sep = ""), 1)), project = "Heterogeneity")
}
# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

for (i in 1:length(OS_list)) {
    OS_list[[i]] <- NormalizeData(OS_list[[i]]) %>%
        FindVariableFeatures(selection.method = "vst") %>%
        ScaleData() %>%
        RunPCA(pc.genes = OS_list[[i]]@var.genes, npcs = 20) %>%
        RunUMAP(reduction = "pca", dims = 1:20) %>%
        FindNeighbors(reduction = "pca", dims = 1:20) %>%
        FindClusters(resolution = 0.3)

    OS_list[[i]] <- CellCycleScoring(object = OS_list[[i]], s.features = s.genes, g2m.features = g2m.genes,
        set.ident = TRUE)

    OS_list[[i]] <- ScaleData(object = OS_list[[i]], vars.to.regress = c("S.Score", "G2M.Score"),
        features = VariableFeatures(OS_list[[i]]), block.size = 10000)  # Change to VariableFeatures()

    OS_list[[i]] <- RunPCA(OS_list[[i]], pc.genes = OS_list[[i]]@var.genes, npcs = 20) %>%
        RunUMAP(reduction = "pca", dims = 1:20) %>%
        FindNeighbors(reduction = "pca", dims = 1:20) %>%
        FindClusters(resolution = 0.3)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5698
## Number of edges: 195555
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8596
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5698
## Number of edges: 190346
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8444
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10268
## Number of edges: 352757
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8850
## Number of communities: 7
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 10268
## Number of edges: 343259
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8657
## Number of communities: 7
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3296
## Number of edges: 114815
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8568
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 3296
## Number of edges: 110588
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8283
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5782
## Number of edges: 203134
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8971
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5782
## Number of edges: 201063
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8855
## Number of communities: 5
## Elapsed time: 0 seconds

save(OS_list, file = "Data/OS_list_CCR.RData")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux 8.4 (Ootpa)
## 
## Matrix products: default
## BLAS:   /gpfs0/export/apps/opt/R/4.1.0-foss-2020a/lib64/R/lib/libRblas.so
## LAPACK: /gpfs0/export/apps/opt/R/4.1.0-foss-2020a/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] cluster_2.1.4            msigdbr_7.5.1            rrrSingleCellUtils_0.5.0
##  [4] nichenetr_1.1.0          RColorBrewer_1.1-3       ggplot2_3.3.6           
##  [7] reshape2_1.4.4           sp_1.4-7                 SeuratObject_4.1.0      
## [10] Seurat_4.1.1             reticulate_1.25          Matrix_1.5-3            
## [13] dplyr_1.0.9              cowplot_1.1.1            ggsci_2.9               
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2            tidyselect_1.1.2      htmlwidgets_1.5.4    
##   [4] grid_4.1.0            Rtsne_0.16            pROC_1.18.0          
##   [7] munsell_0.5.0         codetools_0.2-18      ica_1.0-3            
##  [10] future_1.29.0         miniUI_0.1.1.1        withr_2.5.0          
##  [13] spatstat.random_3.0-1 colorspace_2.0-3      progressr_0.11.0     
##  [16] highr_0.9             knitr_1.39            rstudioapi_0.13      
##  [19] stats4_4.1.0          ROCR_1.0-11           tensor_1.5           
##  [22] listenv_0.8.0         labeling_0.4.2        polyclip_1.10-4      
##  [25] farver_2.1.1          parallelly_1.32.1     vctrs_0.4.1          
##  [28] generics_0.1.3        ipred_0.9-12          xfun_0.31            
##  [31] randomForest_4.7-1.1  R6_2.5.1              doParallel_1.0.17    
##  [34] clue_0.3-63           bitops_1.0-7          spatstat.utils_3.0-1 
##  [37] assertthat_0.2.1      promises_1.2.0.1      scales_1.2.0         
##  [40] nnet_7.3-18           rgeos_0.5-9           gtable_0.3.1         
##  [43] globals_0.16.2        goftest_1.2-3         timeDate_3043.102    
##  [46] rlang_1.0.2           GlobalOptions_0.1.2   splines_4.1.0        
##  [49] lazyeval_0.2.2        ModelMetrics_1.2.2.2  spatstat.geom_3.0-3  
##  [52] checkmate_2.1.0       yaml_2.3.5            abind_1.4-5          
##  [55] backports_1.4.1       httpuv_1.6.6          Hmisc_4.7-0          
##  [58] caret_6.0-92          DiagrammeR_1.0.9      tools_4.1.0          
##  [61] lava_1.6.10           ellipsis_0.3.2        spatstat.core_2.4-4  
##  [64] jquerylib_0.1.4       proxy_0.4-27          BiocGenerics_0.40.0  
##  [67] ggridges_0.5.3        Rcpp_1.0.9            plyr_1.8.8           
##  [70] base64enc_0.1-3       visNetwork_2.1.0      purrr_0.3.4          
##  [73] rpart_4.1.16          deldir_1.0-6          pbapply_1.6-0        
##  [76] GetoptLong_1.0.5      S4Vectors_0.32.4      zoo_1.8-10           
##  [79] ggrepel_0.9.1         magrittr_2.0.3        RSpectra_0.16-1      
##  [82] data.table_1.14.6     scattermore_0.8       circlize_0.4.15      
##  [85] lmtest_0.9-40         RANN_2.6.1            fitdistrplus_1.1-8   
##  [88] matrixStats_0.63.0    hms_1.1.1             patchwork_1.1.1      
##  [91] mime_0.12             evaluate_0.18         xtable_1.8-4         
##  [94] jpeg_0.1-9            IRanges_2.28.0        gridExtra_2.3        
##  [97] shape_1.4.6           compiler_4.1.0        tibble_3.1.7         
## [100] KernSmooth_2.23-20    crayon_1.5.2          htmltools_0.5.2      
## [103] mgcv_1.8-41           later_1.3.0           tzdb_0.3.0           
## [106] Formula_1.2-4         tidyr_1.2.0           lubridate_1.8.0      
## [109] DBI_1.1.3             formatR_1.12          ComplexHeatmap_2.10.0
## [112] MASS_7.3-58.1         babelgene_22.3        readr_2.1.2          
## [115] cli_3.4.1             parallel_4.1.0        gower_1.0.0          
## [118] igraph_1.3.1          pkgconfig_2.0.3       foreign_0.8-83       
## [121] plotly_4.10.0         spatstat.sparse_3.0-0 recipes_0.2.0        
## [124] foreach_1.5.2         bslib_0.3.1           hardhat_0.2.0        
## [127] prodlim_2019.11.13    stringr_1.4.0         digest_0.6.30        
## [130] sctransform_0.3.3     RcppAnnoy_0.0.20      spatstat.data_3.0-0  
## [133] rmarkdown_2.14        leiden_0.4.2          htmlTable_2.4.0      
## [136] uwot_0.1.11           shiny_1.7.1           rjson_0.2.21         
## [139] lifecycle_1.0.1       nlme_3.1-160          jsonlite_1.8.3       
## [142] viridisLite_0.4.0     limma_3.50.3          fansi_1.0.3          
## [145] pillar_1.7.0          lattice_0.20-45       fastmap_1.1.0        
## [148] httr_1.4.4            survival_3.3-1        glue_1.6.2           
## [151] fdrtool_1.2.17        png_0.1-7             iterators_1.0.14     
## [154] class_7.3-20          stringi_1.7.6         sass_0.4.1           
## [157] latticeExtra_0.6-29   caTools_1.18.2        irlba_2.3.5.1        
## [160] e1071_1.7-12          future.apply_1.9.0